# -*- coding:utf-8 -*-
import urlparse,re,urllib2,MySQLdb,datetime,time
def main(update_time):
    cxn=MySQLdb.connect(db='duibi',host='localhost',user='root',passwd='123456ms')
    cxn.set_character_set('utf8')
    cur=cxn.cursor()

    url='''
    http://www.etao.com/pk/index.php?cat=350401
    '''


    res=urllib2.urlopen(url)
    content=res.read()

    def get_shangjia(data):
        if 'gome' in data:
            return '国美'
        elif 'coo8' in data:
            return '库巴'
        elif '51buy' in data:
            return '易迅'
        elif '360buy' in data:
            return '京东'
        elif 'dangdang' in data:
            return '当当'
        elif 'suning' in data:
            return '苏宁'
        else:
            return '其它'

    patt='<li>[\s\S]+?</li>'
    li_list=re.findall(patt,content)
    for li in li_list:
        patt='<a href="([\s\S]+?)"'
        m=re.search(patt,li)
        if m:
            shangjia=m.group(1)
            shangjia=get_shangjia(shangjia).decode('utf-8')
            print shangjia
        
        lines=li.split('\n')
        line_num=0
        jp_list=[]
        for line in lines:
            line_num+=1
            patt1='>([\s\S]+?)<'
            m=re.search(patt1,line)
            if m and line_num>3:
                jp_list.append(m.group(1))
        print jp_list
        jin=int(jp_list[0])
        yin=int(jp_list[1])
        tong=int(jp_list[2])
        total=int(jp_list[3])

        sql='insert into paihangbang (shangjia,jin,yin,tong,total,update_time) values(%s,%s,%s,%s,%s,%s)'
        cur.execute(sql,(shangjia,jin,yin,tong,total,update_time))



    cxn.commit()
    cur.close()
    cxn.close()

if __name__=='__main__':
    main('time')







